Подготовьте прототип модели машинного обучения для «Цифры». Компания разрабатывает решения для эффективной работы промышленных предприятий.
Модель должна предсказать коэффициент восстановления золота из золотосодержащей руды. Используйте данные с параметрами добычи и очистки.
Модель поможет оптимизировать производство, чтобы не запускать предприятие с убыточными характеристиками.
Вам нужно:
Чтобы выполнить проект, обращайтесь к библиотекам pandas, matplotlib и sklearn. Вам поможет их документация.
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import time
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from sklearn.impute import KNNImputer
from sklearn.dummy import DummyRegressor
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import make_scorer, mean_absolute_error
# снимаем ограничение на количество столбцов
pd.set_option('display.max_columns', None)
# снимаем ограничение на ширину столбцов
#pd.set_option('display.max_colwidth', None)
# игнорируем предупреждения
pd.set_option('chained_assignment', None)
pd.set_option('display.max_rows', None)
# чтобы предупреждение об "ошибке" sklearn и pandas не появлялось
pd.options.mode.chained_assignment = None
# выставляем ограничение на показ знаков после запятой
pd.options.display.float_format = '{:,.2f}'.format
# устанавливаем стиль графиков
sns.set_style('darkgrid')
sns.set(rc={'figure.dpi':200, 'savefig.dpi':300})
sns.set_context('notebook')
sns.set_style('ticks')
try:
gold_train = pd.read_csv('/datasets/gold_recovery_train_new.csv')
gold_test = pd.read_csv('/datasets/gold_recovery_test_new.csv')
gold_full = pd.read_csv('/datasets/gold_recovery_full_new.csv')
except:
gold_train = pd.read_csv('gold_recovery_train_new.csv')
gold_test = pd.read_csv('gold_recovery_test_new.csv')
gold_full = pd.read_csv('gold_recovery_full_new.csv')
gold_train = gold_train.replace(float("-inf"),np.nan)
gold_test = gold_test.replace(float("-inf"),np.nan)
# посмотрим на размеры датасетов
gold_train.shape, gold_test.shape, gold_full.shape
((14149, 87), (5290, 53), (19439, 87))
# Проверим явные дубликаты
gold_train.duplicated().sum(), gold_test.duplicated().sum(), gold_full.duplicated().sum()
(0, 0, 0)
def percentage_missing_values(a, b, c, limit=100):
df = [(a, 'gold_train'), (b, 'gold_test'), (c, 'gold_full')]
for i in df:
total_count = np.product(i[0].shape)
missing_count = sum(i[0].isna().sum())
missing_percentage = (missing_count / total_count) * 100
print(f'Процент пропущенных значений в датафрейме {i[1]}: {missing_percentage:.2f}%')
if missing_percentage > limit:
print(f'Внимание! Превышен лимит пропущенных значений ({limit}%).')
percentage_missing_values(gold_train, gold_test, gold_full)
Процент пропущенных значений в датафрейме gold_train: 0.33% Процент пропущенных значений в датафрейме gold_test: 0.03% Процент пропущенных значений в датафрейме gold_full: 0.26%
Проведем разведочный анализ по каждому датасету отдельно
Начнем с датасета gold_train
gold_train.head()
date | final.output.concentrate_ag | final.output.concentrate_pb | final.output.concentrate_sol | final.output.concentrate_au | final.output.recovery | final.output.tail_ag | final.output.tail_pb | final.output.tail_sol | final.output.tail_au | primary_cleaner.input.sulfate | primary_cleaner.input.depressant | primary_cleaner.input.feed_size | primary_cleaner.input.xanthate | primary_cleaner.output.concentrate_ag | primary_cleaner.output.concentrate_pb | primary_cleaner.output.concentrate_sol | primary_cleaner.output.concentrate_au | primary_cleaner.output.tail_ag | primary_cleaner.output.tail_pb | primary_cleaner.output.tail_sol | primary_cleaner.output.tail_au | primary_cleaner.state.floatbank8_a_air | primary_cleaner.state.floatbank8_a_level | primary_cleaner.state.floatbank8_b_air | primary_cleaner.state.floatbank8_b_level | primary_cleaner.state.floatbank8_c_air | primary_cleaner.state.floatbank8_c_level | primary_cleaner.state.floatbank8_d_air | primary_cleaner.state.floatbank8_d_level | rougher.calculation.sulfate_to_au_concentrate | rougher.calculation.floatbank10_sulfate_to_au_feed | rougher.calculation.floatbank11_sulfate_to_au_feed | rougher.calculation.au_pb_ratio | rougher.input.feed_ag | rougher.input.feed_pb | rougher.input.feed_rate | rougher.input.feed_size | rougher.input.feed_sol | rougher.input.feed_au | rougher.input.floatbank10_sulfate | rougher.input.floatbank10_xanthate | rougher.input.floatbank11_sulfate | rougher.input.floatbank11_xanthate | rougher.output.concentrate_ag | rougher.output.concentrate_pb | rougher.output.concentrate_sol | rougher.output.concentrate_au | rougher.output.recovery | rougher.output.tail_ag | rougher.output.tail_pb | rougher.output.tail_sol | rougher.output.tail_au | rougher.state.floatbank10_a_air | rougher.state.floatbank10_a_level | rougher.state.floatbank10_b_air | rougher.state.floatbank10_b_level | rougher.state.floatbank10_c_air | rougher.state.floatbank10_c_level | rougher.state.floatbank10_d_air | rougher.state.floatbank10_d_level | rougher.state.floatbank10_e_air | rougher.state.floatbank10_e_level | rougher.state.floatbank10_f_air | rougher.state.floatbank10_f_level | secondary_cleaner.output.tail_ag | secondary_cleaner.output.tail_pb | secondary_cleaner.output.tail_sol | secondary_cleaner.output.tail_au | secondary_cleaner.state.floatbank2_a_air | secondary_cleaner.state.floatbank2_a_level | secondary_cleaner.state.floatbank2_b_air | secondary_cleaner.state.floatbank2_b_level | secondary_cleaner.state.floatbank3_a_air | secondary_cleaner.state.floatbank3_a_level | secondary_cleaner.state.floatbank3_b_air | secondary_cleaner.state.floatbank3_b_level | secondary_cleaner.state.floatbank4_a_air | secondary_cleaner.state.floatbank4_a_level | secondary_cleaner.state.floatbank4_b_air | secondary_cleaner.state.floatbank4_b_level | secondary_cleaner.state.floatbank5_a_air | secondary_cleaner.state.floatbank5_a_level | secondary_cleaner.state.floatbank5_b_air | secondary_cleaner.state.floatbank5_b_level | secondary_cleaner.state.floatbank6_a_air | secondary_cleaner.state.floatbank6_a_level | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-01-15 00:00:00 | 6.06 | 9.89 | 5.51 | 42.19 | 70.54 | 10.41 | 0.90 | 16.90 | 2.14 | 127.09 | 10.13 | 7.25 | 0.99 | 8.55 | 10.39 | 19.53 | 34.17 | 14.94 | 2.53 | 7.48 | 2.11 | 1,549.78 | -498.91 | 1,551.43 | -516.40 | 1,549.87 | -498.67 | 1,554.37 | -493.43 | 41,885.71 | 3,481.78 | 3,520.34 | 2.84 | 6.10 | 2.28 | 523.55 | 55.49 | 36.81 | 6.49 | 11.99 | 6.01 | 11.84 | 6.01 | 11.50 | 7.10 | 28.03 | 19.79 | 87.11 | 5.01 | 0.51 | 19.15 | 1.17 | 999.71 | -404.07 | 1,603.01 | -434.72 | 1,602.38 | -442.20 | 1,598.94 | -451.29 | 1,404.47 | -455.46 | 1,416.35 | -451.94 | 14.50 | 4.69 | 8.76 | 2.61 | 25.85 | -498.53 | 23.89 | -501.41 | 23.96 | -495.26 | 21.94 | -499.34 | 14.02 | -502.49 | 12.10 | -504.72 | 9.93 | -498.31 | 8.08 | -500.47 | 14.15 | -605.84 |
1 | 2016-01-15 01:00:00 | 6.03 | 9.97 | 5.26 | 42.70 | 69.27 | 10.46 | 0.93 | 16.63 | 2.22 | 125.63 | 10.30 | 7.25 | 1.00 | 8.56 | 10.50 | 19.37 | 34.12 | 16.25 | 3.05 | 6.73 | 2.35 | 1,576.17 | -500.90 | 1,575.95 | -499.87 | 1,575.99 | -499.32 | 1,574.48 | -498.93 | 42,050.86 | 3,498.37 | 3,489.98 | 2.86 | 6.16 | 2.27 | 525.29 | 57.28 | 35.75 | 6.48 | 11.97 | 6.01 | 12.00 | 6.01 | 11.62 | 7.28 | 28.07 | 20.05 | 86.84 | 4.96 | 0.54 | 18.97 | 1.18 | 1,000.29 | -400.07 | 1,600.75 | -449.95 | 1,600.48 | -449.83 | 1,600.53 | -449.95 | 1,399.23 | -450.87 | 1,399.72 | -450.12 | 14.27 | 4.59 | 9.00 | 2.49 | 25.88 | -499.99 | 23.89 | -500.37 | 23.97 | -500.09 | 22.09 | -499.45 | 13.99 | -505.50 | 11.95 | -501.33 | 10.04 | -500.17 | 7.98 | -500.58 | 14.00 | -599.79 |
2 | 2016-01-15 02:00:00 | 6.06 | 10.21 | 5.38 | 42.66 | 68.12 | 10.51 | 0.95 | 16.21 | 2.26 | 123.82 | 11.32 | 7.25 | 0.99 | 8.60 | 10.35 | 19.17 | 33.97 | 16.49 | 3.12 | 6.47 | 2.42 | 1,601.56 | -500.00 | 1,600.39 | -500.61 | 1,602.00 | -500.87 | 1,599.54 | -499.83 | 42,018.10 | 3,495.35 | 3,502.36 | 2.95 | 6.12 | 2.16 | 530.03 | 57.51 | 35.97 | 6.36 | 11.92 | 6.20 | 11.92 | 6.20 | 11.70 | 7.22 | 27.45 | 19.74 | 86.84 | 4.84 | 0.55 | 18.81 | 1.16 | 999.72 | -400.07 | 1,599.34 | -450.01 | 1,599.67 | -449.95 | 1,599.85 | -449.95 | 1,399.18 | -449.94 | 1,400.32 | -450.53 | 14.12 | 4.62 | 8.84 | 2.46 | 26.01 | -499.93 | 23.89 | -499.95 | 23.91 | -499.44 | 23.96 | -499.90 | 14.02 | -502.52 | 11.91 | -501.13 | 10.07 | -500.13 | 8.01 | -500.52 | 14.03 | -601.43 |
3 | 2016-01-15 03:00:00 | 6.05 | 9.98 | 4.86 | 42.69 | 68.35 | 10.42 | 0.88 | 16.53 | 2.15 | 122.27 | 11.32 | 7.25 | 1.00 | 7.22 | 8.50 | 15.98 | 28.26 | 16.02 | 2.96 | 6.84 | 2.26 | 1,599.97 | -500.95 | 1,600.66 | -499.68 | 1,600.30 | -500.73 | 1,600.45 | -500.05 | 42,029.45 | 3,498.58 | 3,499.16 | 3.00 | 6.04 | 2.04 | 542.59 | 57.79 | 36.86 | 6.12 | 11.63 | 6.20 | 11.69 | 6.20 | 11.92 | 7.18 | 27.34 | 19.32 | 87.23 | 4.66 | 0.54 | 19.33 | 1.08 | 999.81 | -400.20 | 1,600.06 | -450.62 | 1,600.01 | -449.91 | 1,597.73 | -450.13 | 1,400.94 | -450.03 | 1,400.23 | -449.79 | 13.73 | 4.48 | 9.12 | 2.32 | 25.94 | -499.18 | 23.96 | -499.85 | 23.97 | -500.01 | 23.95 | -499.94 | 14.04 | -500.86 | 12.00 | -501.19 | 9.97 | -499.20 | 7.98 | -500.26 | 14.01 | -600.00 |
4 | 2016-01-15 04:00:00 | 6.15 | 10.14 | 4.94 | 42.77 | 66.93 | 10.36 | 0.79 | 16.53 | 2.06 | 117.99 | 11.91 | 7.25 | 1.01 | 9.09 | 9.99 | 19.20 | 33.04 | 16.48 | 3.11 | 6.55 | 2.28 | 1,601.34 | -498.98 | 1,601.44 | -500.32 | 1,599.58 | -500.89 | 1,602.65 | -500.59 | 42,125.35 | 3,494.80 | 3,506.68 | 3.17 | 6.06 | 1.79 | 540.53 | 56.05 | 34.35 | 5.66 | 10.96 | 6.20 | 10.96 | 6.19 | 12.41 | 7.24 | 27.04 | 19.22 | 86.69 | 4.55 | 0.52 | 19.27 | 1.01 | 999.68 | -399.75 | 1,600.21 | -449.60 | 1,600.36 | -450.03 | 1,599.76 | -449.91 | 1,401.56 | -448.88 | 1,401.16 | -450.41 | 14.08 | 4.47 | 8.87 | 2.33 | 26.02 | -500.28 | 23.96 | -500.59 | 23.99 | -500.08 | 23.96 | -499.99 | 14.03 | -499.84 | 11.95 | -501.05 | 9.93 | -501.69 | 7.89 | -500.36 | 14.00 | -601.50 |
gold_train.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 14149 entries, 0 to 14148 Data columns (total 87 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 14149 non-null object 1 final.output.concentrate_ag 14148 non-null float64 2 final.output.concentrate_pb 14148 non-null float64 3 final.output.concentrate_sol 13938 non-null float64 4 final.output.concentrate_au 14149 non-null float64 5 final.output.recovery 14149 non-null float64 6 final.output.tail_ag 14149 non-null float64 7 final.output.tail_pb 14049 non-null float64 8 final.output.tail_sol 14144 non-null float64 9 final.output.tail_au 14149 non-null float64 10 primary_cleaner.input.sulfate 14129 non-null float64 11 primary_cleaner.input.depressant 14117 non-null float64 12 primary_cleaner.input.feed_size 14149 non-null float64 13 primary_cleaner.input.xanthate 14049 non-null float64 14 primary_cleaner.output.concentrate_ag 14149 non-null float64 15 primary_cleaner.output.concentrate_pb 14063 non-null float64 16 primary_cleaner.output.concentrate_sol 13863 non-null float64 17 primary_cleaner.output.concentrate_au 14149 non-null float64 18 primary_cleaner.output.tail_ag 14148 non-null float64 19 primary_cleaner.output.tail_pb 14134 non-null float64 20 primary_cleaner.output.tail_sol 14103 non-null float64 21 primary_cleaner.output.tail_au 14149 non-null float64 22 primary_cleaner.state.floatbank8_a_air 14145 non-null float64 23 primary_cleaner.state.floatbank8_a_level 14148 non-null float64 24 primary_cleaner.state.floatbank8_b_air 14145 non-null float64 25 primary_cleaner.state.floatbank8_b_level 14148 non-null float64 26 primary_cleaner.state.floatbank8_c_air 14147 non-null float64 27 primary_cleaner.state.floatbank8_c_level 14148 non-null float64 28 primary_cleaner.state.floatbank8_d_air 14146 non-null float64 29 primary_cleaner.state.floatbank8_d_level 14148 non-null float64 30 rougher.calculation.sulfate_to_au_concentrate 14148 non-null float64 31 rougher.calculation.floatbank10_sulfate_to_au_feed 14148 non-null float64 32 rougher.calculation.floatbank11_sulfate_to_au_feed 14148 non-null float64 33 rougher.calculation.au_pb_ratio 14149 non-null float64 34 rougher.input.feed_ag 14149 non-null float64 35 rougher.input.feed_pb 14049 non-null float64 36 rougher.input.feed_rate 14141 non-null float64 37 rougher.input.feed_size 14005 non-null float64 38 rougher.input.feed_sol 14071 non-null float64 39 rougher.input.feed_au 14149 non-null float64 40 rougher.input.floatbank10_sulfate 14120 non-null float64 41 rougher.input.floatbank10_xanthate 14141 non-null float64 42 rougher.input.floatbank11_sulfate 14113 non-null float64 43 rougher.input.floatbank11_xanthate 13721 non-null float64 44 rougher.output.concentrate_ag 14149 non-null float64 45 rougher.output.concentrate_pb 14149 non-null float64 46 rougher.output.concentrate_sol 14127 non-null float64 47 rougher.output.concentrate_au 14149 non-null float64 48 rougher.output.recovery 14149 non-null float64 49 rougher.output.tail_ag 14148 non-null float64 50 rougher.output.tail_pb 14149 non-null float64 51 rougher.output.tail_sol 14149 non-null float64 52 rougher.output.tail_au 14149 non-null float64 53 rougher.state.floatbank10_a_air 14148 non-null float64 54 rougher.state.floatbank10_a_level 14148 non-null float64 55 rougher.state.floatbank10_b_air 14148 non-null float64 56 rougher.state.floatbank10_b_level 14148 non-null float64 57 rougher.state.floatbank10_c_air 14148 non-null float64 58 rougher.state.floatbank10_c_level 14148 non-null float64 59 rougher.state.floatbank10_d_air 14149 non-null float64 60 rougher.state.floatbank10_d_level 14149 non-null float64 61 rougher.state.floatbank10_e_air 13713 non-null float64 62 rougher.state.floatbank10_e_level 14149 non-null float64 63 rougher.state.floatbank10_f_air 14149 non-null float64 64 rougher.state.floatbank10_f_level 14149 non-null float64 65 secondary_cleaner.output.tail_ag 14147 non-null float64 66 secondary_cleaner.output.tail_pb 14139 non-null float64 67 secondary_cleaner.output.tail_sol 12544 non-null float64 68 secondary_cleaner.output.tail_au 14149 non-null float64 69 secondary_cleaner.state.floatbank2_a_air 13932 non-null float64 70 secondary_cleaner.state.floatbank2_a_level 14148 non-null float64 71 secondary_cleaner.state.floatbank2_b_air 14128 non-null float64 72 secondary_cleaner.state.floatbank2_b_level 14148 non-null float64 73 secondary_cleaner.state.floatbank3_a_air 14145 non-null float64 74 secondary_cleaner.state.floatbank3_a_level 14148 non-null float64 75 secondary_cleaner.state.floatbank3_b_air 14148 non-null float64 76 secondary_cleaner.state.floatbank3_b_level 14148 non-null float64 77 secondary_cleaner.state.floatbank4_a_air 14143 non-null float64 78 secondary_cleaner.state.floatbank4_a_level 14148 non-null float64 79 secondary_cleaner.state.floatbank4_b_air 14148 non-null float64 80 secondary_cleaner.state.floatbank4_b_level 14148 non-null float64 81 secondary_cleaner.state.floatbank5_a_air 14148 non-null float64 82 secondary_cleaner.state.floatbank5_a_level 14148 non-null float64 83 secondary_cleaner.state.floatbank5_b_air 14148 non-null float64 84 secondary_cleaner.state.floatbank5_b_level 14148 non-null float64 85 secondary_cleaner.state.floatbank6_a_air 14147 non-null float64 86 secondary_cleaner.state.floatbank6_a_level 14148 non-null float64 dtypes: float64(86), object(1) memory usage: 9.4+ MB
gold_train.isna().sum()
date 0 final.output.concentrate_ag 1 final.output.concentrate_pb 1 final.output.concentrate_sol 211 final.output.concentrate_au 0 final.output.recovery 0 final.output.tail_ag 0 final.output.tail_pb 100 final.output.tail_sol 5 final.output.tail_au 0 primary_cleaner.input.sulfate 20 primary_cleaner.input.depressant 32 primary_cleaner.input.feed_size 0 primary_cleaner.input.xanthate 100 primary_cleaner.output.concentrate_ag 0 primary_cleaner.output.concentrate_pb 86 primary_cleaner.output.concentrate_sol 286 primary_cleaner.output.concentrate_au 0 primary_cleaner.output.tail_ag 1 primary_cleaner.output.tail_pb 15 primary_cleaner.output.tail_sol 46 primary_cleaner.output.tail_au 0 primary_cleaner.state.floatbank8_a_air 4 primary_cleaner.state.floatbank8_a_level 1 primary_cleaner.state.floatbank8_b_air 4 primary_cleaner.state.floatbank8_b_level 1 primary_cleaner.state.floatbank8_c_air 2 primary_cleaner.state.floatbank8_c_level 1 primary_cleaner.state.floatbank8_d_air 3 primary_cleaner.state.floatbank8_d_level 1 rougher.calculation.sulfate_to_au_concentrate 1 rougher.calculation.floatbank10_sulfate_to_au_feed 1 rougher.calculation.floatbank11_sulfate_to_au_feed 1 rougher.calculation.au_pb_ratio 0 rougher.input.feed_ag 0 rougher.input.feed_pb 100 rougher.input.feed_rate 8 rougher.input.feed_size 144 rougher.input.feed_sol 78 rougher.input.feed_au 0 rougher.input.floatbank10_sulfate 29 rougher.input.floatbank10_xanthate 8 rougher.input.floatbank11_sulfate 36 rougher.input.floatbank11_xanthate 428 rougher.output.concentrate_ag 0 rougher.output.concentrate_pb 0 rougher.output.concentrate_sol 22 rougher.output.concentrate_au 0 rougher.output.recovery 0 rougher.output.tail_ag 1 rougher.output.tail_pb 0 rougher.output.tail_sol 0 rougher.output.tail_au 0 rougher.state.floatbank10_a_air 1 rougher.state.floatbank10_a_level 1 rougher.state.floatbank10_b_air 1 rougher.state.floatbank10_b_level 1 rougher.state.floatbank10_c_air 1 rougher.state.floatbank10_c_level 1 rougher.state.floatbank10_d_air 0 rougher.state.floatbank10_d_level 0 rougher.state.floatbank10_e_air 436 rougher.state.floatbank10_e_level 0 rougher.state.floatbank10_f_air 0 rougher.state.floatbank10_f_level 0 secondary_cleaner.output.tail_ag 2 secondary_cleaner.output.tail_pb 10 secondary_cleaner.output.tail_sol 1605 secondary_cleaner.output.tail_au 0 secondary_cleaner.state.floatbank2_a_air 217 secondary_cleaner.state.floatbank2_a_level 1 secondary_cleaner.state.floatbank2_b_air 21 secondary_cleaner.state.floatbank2_b_level 1 secondary_cleaner.state.floatbank3_a_air 4 secondary_cleaner.state.floatbank3_a_level 1 secondary_cleaner.state.floatbank3_b_air 1 secondary_cleaner.state.floatbank3_b_level 1 secondary_cleaner.state.floatbank4_a_air 6 secondary_cleaner.state.floatbank4_a_level 1 secondary_cleaner.state.floatbank4_b_air 1 secondary_cleaner.state.floatbank4_b_level 1 secondary_cleaner.state.floatbank5_a_air 1 secondary_cleaner.state.floatbank5_a_level 1 secondary_cleaner.state.floatbank5_b_air 1 secondary_cleaner.state.floatbank5_b_level 1 secondary_cleaner.state.floatbank6_a_air 2 secondary_cleaner.state.floatbank6_a_level 1 dtype: int64
gold_train.describe()
final.output.concentrate_ag | final.output.concentrate_pb | final.output.concentrate_sol | final.output.concentrate_au | final.output.recovery | final.output.tail_ag | final.output.tail_pb | final.output.tail_sol | final.output.tail_au | primary_cleaner.input.sulfate | primary_cleaner.input.depressant | primary_cleaner.input.feed_size | primary_cleaner.input.xanthate | primary_cleaner.output.concentrate_ag | primary_cleaner.output.concentrate_pb | primary_cleaner.output.concentrate_sol | primary_cleaner.output.concentrate_au | primary_cleaner.output.tail_ag | primary_cleaner.output.tail_pb | primary_cleaner.output.tail_sol | primary_cleaner.output.tail_au | primary_cleaner.state.floatbank8_a_air | primary_cleaner.state.floatbank8_a_level | primary_cleaner.state.floatbank8_b_air | primary_cleaner.state.floatbank8_b_level | primary_cleaner.state.floatbank8_c_air | primary_cleaner.state.floatbank8_c_level | primary_cleaner.state.floatbank8_d_air | primary_cleaner.state.floatbank8_d_level | rougher.calculation.sulfate_to_au_concentrate | rougher.calculation.floatbank10_sulfate_to_au_feed | rougher.calculation.floatbank11_sulfate_to_au_feed | rougher.calculation.au_pb_ratio | rougher.input.feed_ag | rougher.input.feed_pb | rougher.input.feed_rate | rougher.input.feed_size | rougher.input.feed_sol | rougher.input.feed_au | rougher.input.floatbank10_sulfate | rougher.input.floatbank10_xanthate | rougher.input.floatbank11_sulfate | rougher.input.floatbank11_xanthate | rougher.output.concentrate_ag | rougher.output.concentrate_pb | rougher.output.concentrate_sol | rougher.output.concentrate_au | rougher.output.recovery | rougher.output.tail_ag | rougher.output.tail_pb | rougher.output.tail_sol | rougher.output.tail_au | rougher.state.floatbank10_a_air | rougher.state.floatbank10_a_level | rougher.state.floatbank10_b_air | rougher.state.floatbank10_b_level | rougher.state.floatbank10_c_air | rougher.state.floatbank10_c_level | rougher.state.floatbank10_d_air | rougher.state.floatbank10_d_level | rougher.state.floatbank10_e_air | rougher.state.floatbank10_e_level | rougher.state.floatbank10_f_air | rougher.state.floatbank10_f_level | secondary_cleaner.output.tail_ag | secondary_cleaner.output.tail_pb | secondary_cleaner.output.tail_sol | secondary_cleaner.output.tail_au | secondary_cleaner.state.floatbank2_a_air | secondary_cleaner.state.floatbank2_a_level | secondary_cleaner.state.floatbank2_b_air | secondary_cleaner.state.floatbank2_b_level | secondary_cleaner.state.floatbank3_a_air | secondary_cleaner.state.floatbank3_a_level | secondary_cleaner.state.floatbank3_b_air | secondary_cleaner.state.floatbank3_b_level | secondary_cleaner.state.floatbank4_a_air | secondary_cleaner.state.floatbank4_a_level | secondary_cleaner.state.floatbank4_b_air | secondary_cleaner.state.floatbank4_b_level | secondary_cleaner.state.floatbank5_a_air | secondary_cleaner.state.floatbank5_a_level | secondary_cleaner.state.floatbank5_b_air | secondary_cleaner.state.floatbank5_b_level | secondary_cleaner.state.floatbank6_a_air | secondary_cleaner.state.floatbank6_a_level | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 14,148.00 | 14,148.00 | 13,938.00 | 14,149.00 | 14,149.00 | 14,149.00 | 14,049.00 | 14,144.00 | 14,149.00 | 14,129.00 | 14,117.00 | 14,149.00 | 14,049.00 | 14,149.00 | 14,063.00 | 13,863.00 | 14,149.00 | 14,148.00 | 14,134.00 | 14,103.00 | 14,149.00 | 14,145.00 | 14,148.00 | 14,145.00 | 14,148.00 | 14,147.00 | 14,148.00 | 14,146.00 | 14,148.00 | 14,148.00 | 14,148.00 | 14,148.00 | 14,149.00 | 14,149.00 | 14,049.00 | 14,141.00 | 14,005.00 | 14,071.00 | 14,149.00 | 14,120.00 | 14,141.00 | 14,113.00 | 13,721.00 | 14,149.00 | 14,149.00 | 14,127.00 | 14,149.00 | 14,149.00 | 14,148.00 | 14,149.00 | 14,149.00 | 14,149.00 | 14,148.00 | 14,148.00 | 14,148.00 | 14,148.00 | 14,148.00 | 14,148.00 | 14,149.00 | 14,149.00 | 13,713.00 | 14,149.00 | 14,149.00 | 14,149.00 | 14,147.00 | 14,139.00 | 12,544.00 | 14,149.00 | 13,932.00 | 14,148.00 | 14,128.00 | 14,148.00 | 14,145.00 | 14,148.00 | 14,148.00 | 14,148.00 | 14,143.00 | 14,148.00 | 14,148.00 | 14,148.00 | 14,148.00 | 14,148.00 | 14,148.00 | 14,148.00 | 14,147.00 | 14,148.00 |
mean | 5.14 | 10.13 | 9.20 | 44.00 | 66.52 | 9.61 | 2.60 | 10.51 | 2.92 | 133.32 | 8.87 | 7.32 | 0.89 | 8.20 | 9.59 | 10.11 | 32.39 | 16.30 | 3.44 | 7.53 | 3.84 | 1,608.00 | -488.78 | 1,608.61 | -489.17 | 1,608.88 | -489.61 | 1,542.19 | -483.46 | 40,382.65 | 3,456.61 | 3,253.36 | 2.37 | 8.58 | 3.52 | 474.03 | 60.11 | 36.31 | 7.87 | 11.76 | 5.85 | 11.37 | 5.89 | 11.78 | 7.66 | 28.30 | 19.44 | 82.70 | 5.57 | 0.65 | 17.88 | 1.76 | 1,124.73 | -369.46 | 1,320.71 | -464.26 | 1,299.36 | -465.05 | 1,210.34 | -465.46 | 1,090.21 | -464.92 | 1,035.49 | -464.69 | 14.28 | 5.85 | 6.94 | 4.25 | 29.61 | -502.22 | 24.91 | -503.70 | 29.24 | -478.24 | 22.66 | -488.92 | 19.99 | -478.70 | 15.49 | -460.23 | 16.78 | -483.96 | 13.06 | -483.97 | 19.58 | -506.80 |
std | 1.37 | 1.65 | 2.79 | 4.91 | 10.30 | 2.32 | 0.97 | 3.00 | 0.90 | 39.43 | 3.36 | 0.61 | 0.37 | 2.01 | 2.69 | 4.06 | 5.80 | 3.74 | 1.49 | 2.13 | 1.60 | 128.39 | 35.70 | 131.11 | 33.60 | 134.27 | 35.62 | 278.32 | 47.10 | 380,143.62 | 5,772.51 | 6,753.29 | 0.87 | 1.90 | 1.07 | 104.04 | 22.42 | 4.96 | 1.92 | 3.28 | 1.10 | 3.74 | 1.12 | 2.73 | 1.86 | 6.10 | 3.77 | 14.48 | 1.04 | 0.26 | 3.43 | 0.71 | 169.31 | 93.95 | 183.16 | 57.40 | 213.40 | 55.90 | 210.43 | 55.77 | 184.61 | 56.60 | 175.05 | 56.65 | 4.48 | 2.86 | 4.16 | 2.39 | 5.80 | 60.28 | 5.99 | 62.84 | 5.64 | 54.66 | 5.00 | 41.93 | 5.66 | 50.74 | 5.26 | 58.84 | 5.83 | 37.89 | 5.77 | 39.21 | 5.76 | 37.08 |
min | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 1.08 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | -798.64 | 0.01 | -740.12 | 0.02 | -799.80 | 0.01 | -799.79 | -42,235,197.37 | -486.60 | -264.98 | -0.01 | 0.01 | 0.01 | 0.01 | 9.66 | 0.01 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.59 | 0.01 | 10.01 | 0.02 | -0.04 | -650.20 | -0.65 | -650.26 | -0.04 | -641.00 | -0.55 | -640.52 | -1.97 | -649.44 | -2.43 | -649.88 | 0.00 | 0.00 | 0.00 | 0.00 | 0.08 | -799.61 | 0.00 | -799.87 | 0.00 | -799.61 | 0.00 | -759.18 | 0.00 | -799.71 | 0.00 | -799.89 | -0.37 | -797.14 | 0.65 | -800.01 | 0.20 | -809.40 |
25% | 4.21 | 9.30 | 7.48 | 43.28 | 62.55 | 8.00 | 1.91 | 8.81 | 2.37 | 107.01 | 6.04 | 6.96 | 0.61 | 7.11 | 8.33 | 7.49 | 30.86 | 13.87 | 2.42 | 6.33 | 2.87 | 1,595.70 | -500.29 | 1,558.96 | -500.38 | 1,549.87 | -500.60 | 1,452.68 | -500.46 | 39,994.30 | 2,527.09 | 2,512.20 | 2.00 | 7.13 | 2.78 | 420.78 | 48.97 | 34.12 | 6.60 | 9.86 | 5.12 | 9.51 | 5.20 | 10.49 | 6.85 | 26.70 | 18.43 | 79.99 | 4.92 | 0.47 | 15.69 | 1.31 | 999.80 | -499.79 | 1,199.37 | -500.18 | 1,103.10 | -500.21 | 1,059.71 | -500.36 | 997.18 | -500.25 | 900.97 | -500.48 | 12.18 | 3.98 | 3.23 | 3.15 | 25.10 | -500.25 | 22.05 | -500.27 | 24.99 | -500.18 | 19.95 | -500.11 | 14.99 | -500.63 | 11.89 | -500.15 | 11.08 | -500.36 | 8.99 | -500.11 | 14.99 | -500.75 |
50% | 4.99 | 10.30 | 8.85 | 44.87 | 67.43 | 9.48 | 2.59 | 10.51 | 2.85 | 133.02 | 8.04 | 7.29 | 0.89 | 8.23 | 9.93 | 9.73 | 33.23 | 15.80 | 3.22 | 7.71 | 3.51 | 1,601.82 | -499.91 | 1,601.82 | -499.94 | 1,601.57 | -499.87 | 1,600.17 | -499.83 | 43,684.31 | 2,975.89 | 2,899.81 | 2.25 | 8.16 | 3.42 | 499.45 | 55.37 | 37.02 | 7.65 | 11.69 | 5.95 | 11.38 | 6.00 | 11.75 | 7.76 | 29.26 | 19.95 | 85.30 | 5.72 | 0.63 | 18.02 | 1.75 | 1,001.69 | -300.18 | 1,301.37 | -499.76 | 1,300.21 | -499.68 | 1,200.74 | -499.47 | 1,050.50 | -499.61 | 1,000.05 | -499.36 | 15.36 | 5.44 | 7.30 | 3.98 | 30.03 | -499.96 | 27.02 | -500.01 | 28.02 | -499.88 | 22.04 | -499.97 | 20.00 | -499.68 | 14.98 | -499.39 | 17.93 | -499.70 | 12.00 | -499.91 | 19.98 | -500.06 |
75% | 5.86 | 11.17 | 10.49 | 46.17 | 72.35 | 11.00 | 3.24 | 11.93 | 3.43 | 159.83 | 11.52 | 7.70 | 1.10 | 9.50 | 11.31 | 13.05 | 35.33 | 18.45 | 4.25 | 8.91 | 4.49 | 1,699.72 | -499.38 | 1,700.22 | -499.39 | 1,700.46 | -498.80 | 1,699.36 | -498.48 | 47,760.41 | 3,716.36 | 3,596.53 | 2.66 | 9.92 | 4.23 | 547.33 | 66.08 | 39.42 | 9.07 | 13.61 | 6.60 | 13.50 | 6.70 | 13.43 | 8.60 | 31.74 | 21.39 | 90.17 | 6.31 | 0.79 | 19.94 | 2.19 | 1,299.51 | -299.96 | 1,449.55 | -400.43 | 1,450.35 | -400.66 | 1,344.38 | -401.05 | 1,200.05 | -400.60 | 1,100.17 | -401.01 | 17.23 | 7.80 | 10.55 | 4.88 | 34.89 | -499.59 | 28.94 | -499.76 | 34.99 | -436.92 | 25.97 | -499.76 | 24.99 | -477.47 | 20.06 | -400.04 | 21.35 | -487.71 | 17.98 | -453.19 | 24.99 | -499.54 |
max | 16.00 | 17.03 | 18.12 | 52.76 | 100.00 | 19.55 | 5.64 | 22.32 | 8.20 | 250.13 | 20.05 | 10.47 | 2.51 | 16.08 | 17.08 | 22.28 | 45.93 | 29.46 | 9.63 | 20.62 | 17.79 | 2,079.53 | -330.13 | 2,114.91 | -347.35 | 2,013.16 | -346.65 | 2,398.90 | -30.60 | 3,428,098.94 | 629,638.98 | 718,684.96 | 39.38 | 14.60 | 7.14 | 717.51 | 484.97 | 48.36 | 13.13 | 36.12 | 9.70 | 37.98 | 9.70 | 24.48 | 13.62 | 38.35 | 28.15 | 100.00 | 12.72 | 3.78 | 66.12 | 9.69 | 1,521.98 | -281.04 | 1,809.19 | -296.38 | 2,499.13 | -292.16 | 1,817.20 | -208.33 | 1,922.64 | -272.20 | 1,706.31 | -191.72 | 23.26 | 17.04 | 17.98 | 26.81 | 52.65 | -127.88 | 35.15 | -212.06 | 44.26 | -191.68 | 35.07 | -159.74 | 30.12 | -245.24 | 24.01 | -145.07 | 43.71 | -275.07 | 27.93 | -157.40 | 32.19 | -104.43 |
%%time
# Посмотрим на распределение данных в датасете gold_train
gold_train.hist(figsize=(50, 40), bins=50, color='brown')
plt.show()
CPU times: user 18.5 s, sys: 850 ms, total: 19.3 s Wall time: 19.4 s
# Отдельно визуализируем распределение целевых признаков 'rougher.output.recovery' и 'final.output.recovery'
gold_train['rougher.output.recovery'].hist(figsize=(16, 5), alpha=0.7, bins=100, color='black', edgecolor = 'black')
gold_train['final.output.recovery'].hist(figsize=(16, 5), alpha=0.7, bins=100, color='gold', edgecolor = 'black')
plt.grid(True)
plt.legend(["Эффективность обогащения чернового концентрата 'rougher.output.recovery'",
"Эффективность обогащения финального концентрата 'final.output.recovery'"])
plt.xlabel('Эффективность (коэффициент)')
plt.ylabel('Количество индексаций (замеров)')
plt.title('Распределение эффективности обогащения чернового и финального концентрата в обучающей выборке')
plt.show()
%%time
# Построим тепловую карту (хитмэп) коэффициентов корреляции Пирсона для обучающей выборки (датасет gold_recovery_train_new):
plt.figure(figsize=(50,50))
sns.heatmap(gold_train.corr(), annot=True, fmt = '.2f', vmin=-1, vmax=1, center=0, cmap='coolwarm')
plt.show()